*************************************************************************************************
*																								*
*						Flexible Wages, Bargaining, and The Gender Gap							*
*						Barbara Biasi and Heather Sarsons										*
*						Abraham-Sun (2020) test													*
*																								*
*************************************************************************************************


*-------------------> ABRAHAM-SUN SET UP <-------------------*

g reltime = year-extension
g l_5 = (reltime==-5)
g l_4 = (reltime==-4)
g l_3 = (reltime==-3)
g l_2 = (reltime==-2)
g l_1 = (reltime==-1)
g l0 = (reltime==0)
g l1 = (reltime==1)
g l2 = (reltime==2)
g l3 = (reltime==3)
g l4 = (reltime==4)
g l5 = (reltime==5)

*bys id: egen treated = min(extension)
g t = extension if year==2011
bys id: egen treated = max(t)
drop t
g t1 = treated==2011
g t2 = treated==2012
g t3 = treated==2013
g t4 = treated==2014
g t5 = treated==2016

/* UNCOMMENT TO CALCULATE EVENT STUDY WEIGHTS
eventstudyweights l_5 l_4 l_3 l_2 l_1 l0 l1 l2 l3 l4 l5, controls(i.district_code i.year) cohort(treated) rel_time(reltime) saveweights("weights")

preserve
	insheet using weights.csv, clear

	keep l_2 treated reltime
	reshape wide l_2, i(reltime) j(treated)

	drop if reltime<-5
	graph twoway line l_2* reltime, legend(order(1 "2011" 2 "2012" 3 "2013" 4 "2014" 5 "2016") rows(1)) ///
		xla(-5(1)5) graphreg(fc(white)) yla(-0.5(0.25)0.5) xtitle("Relative Time") ytitle("Weights")
	graph export $out/weights_AS2020.png, replace
	egen w_sum = rowtotal(l_2*)
restore	
*/

* Generate counts for each cohort
count if treated==2011 & year==2011
global c_2011 = r(N)
count if treated==2012 & year==2011
global c_2012 = r(N)
count if treated==2013 & year==2011
global c_2013 = r(N)
count if treated==2014 & year==2011
global c_2014 = r(N)
count if treated==2016 & year==2011
global c_2016 = r(N)

*qui tab reltime
/*
capt drop timex
gen timex = year - ext
qui tab timex
forvalues n = 1/ `r(r)' {
local z = `n' - 8
gen femx_`n' = female * (reltime == `z')
replace femx_`n' = . if reltime == .
label var femx_`n' "`z'"
}
egen Timex = group(reltime)
*/
capt g zero=0
label var zero "0"

save AS_setup.dta, replace

*----------------> OVERALL GAP

	use AS_setup.dta, clear

	reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female, a($exp i.extension##i.year) vce(cluster district_code)

	// store results
	mat b = e(b)
	mat b = b[.,1..11]
	mat V = vecdiag(e(V))
	mat V = V[.,1..11]	
	mat b2way_hrs = b', V'
		
	// exclude the last cohort because it will be used as the control units (not sure if we need to do this)
	foreach vv of varlist femx_5-femx_9 zero femx_11-femx_15 {
		replace `vv' = 0 if treated == 2016
	}

	preserve
		foreach vv of varlist femx_5-femx_9 zero femx_11-femx_15 {
			replace `vv' = . if treated == 2016
		}  
		qui reghdfe t1 if treated<2016, a($exp i.year) resid
			predict res1, resid		
		qui reghdfe t2 if treated<2016, a($exp i.year) resid
			predict res2, resid		
		qui reghdfe t3 if treated<2016, a($exp i.year) resid
			predict res3, resid
		qui reghdfe t4 if treated<2016, a($exp i.year) resid
			predict res4, resid
			
		reg res1 femx_5-femx_9 zero femx_11-femx_15 if treated<2016
			est store r1
		reg res2 femx_5-femx_9 zero femx_11-femx_15 if treated<2016
			est store r2
		reg res3 femx_5-femx_9 zero femx_11-femx_15 if treated<2016
			est store r3
		reg res4 femx_5-femx_9 zero femx_11-femx_15 if treated<2016
			est store r4
			
		suest r1 r2 r3 r4
		matrix V = e(V)
		matrix list V
		matselrc V Vcov, r(1/10,13/22,25/34,37/46) c(1/10,13/22,25/34,37/46)
	restore

	foreach vv of varlist femx_5-femx_9 zero femx_11-femx_15 {
		forval i=2011/2016 {
			gen `vv'_`i' = `vv' * (ext==`i')
		}
	}

	global rhs_rel_year_i ""
	global rhs_rel_year_i "$rhs_rel_year_i femx_5_2011 femx_6_2011 femx_7_2011 femx_8_2011 femx_9_2011 femx_11_2011 femx_12_2011 femx_13_2011 femx_14_2011 femx_15_2011"
	global rhs_rel_year_i "$rhs_rel_year_i femx_5_2012 femx_6_2012 femx_7_2012 femx_8_2012 femx_9_2012 femx_11_2012 femx_12_2012 femx_13_2012 femx_14_2012 femx_15_2012"
	global rhs_rel_year_i "$rhs_rel_year_i femx_5_2013 femx_6_2013 femx_7_2013 femx_8_2013 femx_9_2013 femx_11_2013 femx_12_2013 femx_13_2013 femx_14_2013 femx_15_2013"
	global rhs_rel_year_i "$rhs_rel_year_i femx_5_2014 femx_6_2014 femx_7_2014 femx_8_2014 femx_9_2014 femx_11_2014 femx_12_2014 femx_13_2014 femx_14_2014 femx_15_2014"

	reghdfe logsalary $rhs_rel_year_i, a($exp i.year##i.extension) vce(cluster district_code) resid

	mat b = e(b)
	mat V = vecdiag(e(V))
	mat b_2011 = (b[.,1..10])
	mat V_2011 = (V[.,1..10])	
	mat b_2012 = (b[.,11..20])
	mat V_2012 = (V[.,11..20])
	mat b_2013 = (b[.,21..30])
	mat V_2013 = (V[.,21..30])
	mat b_2014 = (b[.,31..40])
	mat V_2014 = (V[.,31..40])

	mat biw_long = b // copy the coefficients from the saturated specification, to add to variance estimates later

		* Coef on event time 5 for cohorts 2012, 2013, 2014
		matselrc biw_long tempb, c(11,21,31)
		matselrc Vcov tempVcov, r(11,21,31) c(11,21,31)
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2012/($c_2012+$c_2013+$c_2014)*femx_5_2012 + $c_2013/($c_2012+$c_2013+$c_2014)*femx_5_2013 + $c_2014/($c_2012+$c_2013+$c_2014)*femx_5_2014
		matrix b = r(estimate)
		matrix V = r(se)^2 + temp
		
		* Coef on event time 6 for cohorts 2011, 2012, 2013, 2014
		matselrc biw_long tempb, c(2,12,22,32)
		matselrc Vcov tempVcov, r(2,12,22,32) c(2,12,22,32)
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013+$c_2014)*femx_6_2011 + $c_2012/($c_2011+$c_2012+$c_2013+$c_2014)*femx_6_2012 + $c_2013/($c_2011+$c_2012+$c_2013+$c_2014)*femx_6_2013 + $c_2014/($c_2011+$c_2012+$c_2013+$c_2014)*femx_6_2014
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)

		* Coef on event time 7 for cohorts 2011, 2012, 2013, 2014
		matselrc biw_long tempb, c(3,13,23,33)
		matselrc Vcov tempVcov, r(3,13,23,33) c(3,13,23,33) 
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013+$c_2014)*femx_7_2011 + $c_2012/($c_2011+$c_2012+$c_2013+$c_2014)*femx_7_2012 + $c_2013/($c_2011+$c_2012+$c_2013+$c_2014)*femx_7_2013+ $c_2014/($c_2011+$c_2012+$c_2013+$c_2014)*femx_7_2014
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 8 for cohorts 2011, 2012, 2013, 2014
		matselrc biw_long tempb, c(4,14,24,34)
		matselrc Vcov tempVcov, r(4,14,24,34) c(4,14,24,34)
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013+$c_2014)*femx_8_2011 + $c_2012/($c_2011+$c_2012+$c_2013+$c_2014)*femx_8_2012 + $c_2013/($c_2011+$c_2012+$c_2013+$c_2014)*femx_8_2013 + $c_2014/($c_2011+$c_2012+$c_2013+$c_2014)*femx_8_2014
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 9 for cohorts 2011, 2012, 2013, 2014
		matselrc biw_long tempb, c(5,15,25,35)
		matselrc Vcov tempVcov, r(5,15,25,35) c(5,15,25,35) 
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013+$c_2014)*femx_9_2011 + $c_2012/($c_2011+$c_2012+$c_2013+$c_2014)*femx_9_2012 + $c_2013/($c_2011+$c_2012+$c_2013+$c_2014)*femx_9_2013 + $c_2014/($c_2011+$c_2012+$c_2013+$c_2014)*femx_9_2014
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 10 for cohorts 2011, 2012, 2013, 2014
		* zero

		* Coef on event time 11 for cohorts 2011, 2012, 2013, 2014
		matselrc biw_long tempb, c(7,17,27,37)
		matselrc Vcov tempVcov, r(7,17,27,37) c(7,17,27,37) 
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013+$c_2014)*femx_11_2011 + $c_2012/($c_2011+$c_2012+$c_2013+$c_2014)*femx_11_2012 + $c_2013/($c_2011+$c_2012+$c_2013+$c_2014)*femx_11_2013 + $c_2014/($c_2011+$c_2012+$c_2013+$c_2014)*femx_11_2014
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 12 for cohorts 2011, 2012, 2013
		matselrc biw_long tempb, c(8,18,28,38)
		matselrc Vcov tempVcov, r(8,18,28,38) c(8,18,28,38) 
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013+$c_2014)*femx_12_2011 + $c_2012/($c_2011+$c_2012+$c_2013+$c_2014)*femx_12_2012 + $c_2013/($c_2011+$c_2012+$c_2013+$c_2014)*femx_12_2013 + $c_2014/($c_2011+$c_2012+$c_2013+$c_2014)*femx_12_2014
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 13 for cohorts 2011, 2012, 2013
		matselrc biw_long tempb, c(8,19,29)
		matselrc Vcov tempVcov, r(8,19,29) c(8,19,29) 
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012+$c_2013)*femx_13_2011 + $c_2012/($c_2011+$c_2012+$c_2013)*femx_13_2012 + $c_2013/($c_2011+$c_2012+$c_2013)*femx_13_2013
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 14 for cohorts 2011, 2012
		matselrc biw_long tempb, c(9,19)
		matselrc Vcov tempVcov, r(9,19) c(10,19) 
		matrix temp = tempb*tempVcov*tempb'	
		lincom $c_2011/($c_2011+$c_2012)*femx_14_2011 + $c_2012/($c_2011+$c_2012)*femx_14_2012 
		matrix b = b \ r(estimate)
		matrix V = V \ (r(se)^2 + temp)	

		* Coef on event time 15 for cohorts 2011
		lincom femx_15_2011
		matrix b = b \ r(estimate)
		matrix V = V \ r(se)^2
		
		
		mat b = b[1..5,.] \ (0) \b[6..10,.]
		mat V = V[1..5,.] \ (0) \V[6..10,.]	
		mat b_2011=b_2011'
		mat b_2012=b_2012'
		mat b_2013=b_2013'
		mat b_2014=b_2014'	
		mat V_2011=V_2011'
		mat V_2012=V_2012'
		mat V_2013=V_2013'	
		mat V_2014=V_2014'		
		mat b_2011 = b_2011[1..5,.] \ (0) \b_2011[6..10,.]
		mat V_2011 = V_2011[1..5,.] \ (0) \V_2011[6..10,.]	
		mat b_2012 = b_2012[1..5,.] \ (0) \b_2012[6..10,.]
		mat V_2012 = V_2012[1..5,.] \ (0) \V_2012[6..10,.]	
		mat b_2013 = b_2013[1..5,.] \ (0) \b_2013[6..10,.]
		mat V_2013 = V_2013[1..5,.] \ (0) \V_2013[6..10,.]	
		mat b_2014 = b_2014[1..5,.] \ (0) \b_2014[6..10,.]
		mat V_2014 = V_2014[1..5,.] \ (0) \V_2014[6..10,.]	
		
		*mat biw_hrs = b,V,b_2011',V_2011',b_2012',V_2012',b_2013',V_2013'
		mat biw_hrs = b,V,b_2011,V_2011,b_2012,V_2012,b_2013,V_2013,b_2014,V_2014

	// output results to excel spreadsheet
	clear
	global n_obs = 11
	set obs $n_obs
	gen y = _n-5 // generate relative year

	* two-way FE estimates
	*mat b2way_hrs = b2way_hrs[1..4,.]\(0,0)\b2way_hrs[5..9,.] // fill in zeros for evt_time_4, which are normalized to zero
	svmat b2way_hrs 

	gen ci1_u = sqrt(b2way_hrs2)*1.96+b2way_hrs1
	gen ci1_l = -sqrt(b2way_hrs2)*1.96+b2way_hrs1
	gen sd1 = sqrt(b2way_hrs2)

	* IW estimates
	svmat biw_hrs

	gen ci2_u = sqrt(biw_hrs2)*1.96+biw_hrs1
	gen ci2_l = -sqrt(biw_hrs2)*1.96+biw_hrs1
	gen sd2 = sqrt(biw_hrs2)
	* CATTs estimates are biw_hrs3 - biw_hrs7
	gen sd2011 = sqrt(biw_hrs4)
	gen sd2012 = sqrt(biw_hrs6)
	gen sd2013 = sqrt(biw_hrs8)
	gen sd2014 = sqrt(biw_hrs10)
	order y b2way_hrs1 biw_hrs1 biw_hrs3 biw_hrs5 biw_hrs7 biw_hrs9 sd1 sd2 sd2011 sd2012 sd2013 sd2014

	export delimited using weighted_estimates.csv, replace
		


	

